###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces descriptives for the sophistication measure for our experimental modules
########################################################

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R 

rm(list=ls()[which(ls()!='dirs')])
library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)

load('data/data_matrix_scored.Rdata')
scored=read.csv('data/candidate_matrix.csv')

###########################################
###########################################

library(car)

###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank
 dfc <- (M/(M-1))*((N-1)/(N-K))
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

data_matrix=data_matrix[,-c(grep(names(data_matrix),pattern='scores'))]
candidate_matrix=cbind(data_matrix,scored[,c('scores','scores_var','scores_policy','scores_policy_var')])

# i_policy is conjoint position in either i1 or i2
# consistency here is candidate too consistent Dem or Rep position on policy
# analogous to above variance in aggregating over candidate profiles on issues

candidate_matrix$consistency=
as.numeric(candidate_matrix$"i_lgbt"==1)*-1+
as.numeric(candidate_matrix$"i_marriage"==1)*1+
as.numeric(candidate_matrix$"i_need"==1)*-1+
as.numeric(candidate_matrix$"i_govabuse"==1)*1+
as.numeric(candidate_matrix$"i_freetrade"==1)*-1+
as.numeric(candidate_matrix$"i_unfairtrade"==1)*1+
as.numeric(candidate_matrix$"i_righttochoose"==1)*-1+
as.numeric(candidate_matrix$"i_unbornlives"==1)*1+
as.numeric(candidate_matrix$"i_raisetaxes"==1)*-1+
as.numeric(candidate_matrix$"i_cuttaxes"==1)*1+
as.numeric(candidate_matrix$"i_co2emissions"==1)*-1+
as.numeric(candidate_matrix$"i_drilling"==1)*1+
as.numeric(candidate_matrix$"i_citizenship"==1)*-1+
as.numeric(candidate_matrix$"i_bordersecurity"==1)*1+
as.numeric(candidate_matrix$"i_guncontrol"==1)*-1+
as.numeric(candidate_matrix$"i_gunrights"==1)*1+
as.numeric(candidate_matrix$"i_reducemilitary"==1)*-1+
as.numeric(candidate_matrix$"i_strengthenmilitary"==1)*1+
as.numeric(candidate_matrix$"i_policing"==1)*-1+
as.numeric(candidate_matrix$"i_criminals"==1)*1

candidate_matrix$libcon=rowMeans(cbind(candidate_matrix[,grep(names(candidate_matrix),pattern='libcon')],candidate_matrix$self_place/3))

############################################################
############################################################
############################################################
############################################################

load('data/issueCrossPressureData.Rdata')

voter_id=unique(candidate_matrix$respondent)
voter_matrix=as.data.frame(matrix(NA,length(voter_id),ncol(candidate_matrix)))
for(i in 1:length(voter_id)){
  ix=which(candidate_matrix$respondent==voter_id[i])[1]
  voter_matrix[i,]=candidate_matrix[ix,]
}
names(voter_matrix)=names(candidate_matrix)
# identify rank order of which components correlate with summary w/n each dimension
# which items are more consistently answered

# frequency of mis-classifications across issues
id=which(voter_matrix$pid==-1)
ir=which(voter_matrix$pid==1)
xmu=c(as.numeric(voter_matrix$libcon_envs[id]>0)+
as.numeric(voter_matrix$libcon_crme[id]>0)+
as.numeric(voter_matrix$libcon_abrt[id]>0)+
as.numeric(voter_matrix$libcon_gays[id]>0)+
as.numeric(voter_matrix$libcon_immi[id]>0)+
as.numeric(voter_matrix$libcon_trad[id]>0)+
as.numeric(voter_matrix$libcon_guns[id]>0)+
as.numeric(voter_matrix$libcon_dfns[id]>0)+
as.numeric(voter_matrix$libcon_taxs[id]>0)+
as.numeric(voter_matrix$libcon_need[id]>0),
as.numeric(voter_matrix$libcon_envs[ir]<0)+
as.numeric(voter_matrix$libcon_crme[ir]<0)+
as.numeric(voter_matrix$libcon_abrt[ir]<0)+
as.numeric(voter_matrix$libcon_gays[ir]<0)+
as.numeric(voter_matrix$libcon_immi[ir]<0)+
as.numeric(voter_matrix$libcon_trad[ir]<0)+
as.numeric(voter_matrix$libcon_guns[ir]<0)+
as.numeric(voter_matrix$libcon_dfns[ir]<0)+
as.numeric(voter_matrix$libcon_taxs[ir]<0)+
as.numeric(voter_matrix$libcon_need[ir]<0))

ymu=c(as.numeric(voter_matrix$libcon_envs[id]<0)+
as.numeric(voter_matrix$libcon_crme[id]<0)+
as.numeric(voter_matrix$libcon_abrt[id]<0)+
as.numeric(voter_matrix$libcon_gays[id]<0)+
as.numeric(voter_matrix$libcon_immi[id]<0)+
as.numeric(voter_matrix$libcon_trad[id]<0)+
as.numeric(voter_matrix$libcon_guns[id]<0)+
as.numeric(voter_matrix$libcon_dfns[id]<0)+
as.numeric(voter_matrix$libcon_taxs[id]<0)+
as.numeric(voter_matrix$libcon_need[id]<0),
as.numeric(voter_matrix$libcon_envs[ir]>0)+
as.numeric(voter_matrix$libcon_crme[ir]>0)+
as.numeric(voter_matrix$libcon_abrt[ir]>0)+
as.numeric(voter_matrix$libcon_gays[ir]>0)+
as.numeric(voter_matrix$libcon_immi[ir]>0)+
as.numeric(voter_matrix$libcon_trad[ir]>0)+
as.numeric(voter_matrix$libcon_guns[ir]>0)+
as.numeric(voter_matrix$libcon_dfns[ir]>0)+
as.numeric(voter_matrix$libcon_taxs[ir]>0)+
as.numeric(voter_matrix$libcon_need[ir]>0))

names(xmu)=c(id,ir)
names(ymu)=c(id,ir)

# kinder and kalmoe + identify the 20-30% who score highest on ideological knowledge/consistency
k_gov=voter_matrix$know_gov
k_sen1=voter_matrix$know_senate1
k_sen2=voter_matrix$know_senate2

k_gov[which(voter_matrix$know_gov=='Republican')]=1
k_gov[grep(voter_matrix$know_gov,pattern='Independent')]=0
k_gov[which(voter_matrix$know_gov=='Democrat')]=-1
k_gov[!grepl(k_gov,pattern='[0,1]')]=-2

k_sen1[which(voter_matrix$know_senate1=='Republican')]=1
k_sen1[grep(voter_matrix$know_senate1,pattern='Independent')]=0
k_sen1[which(voter_matrix$know_senate1=='Democrat')]=-1
k_sen1[!grepl(k_sen1,pattern='[0,1]')]=-2

k_sen2[which(voter_matrix$know_senate2=='Republican')]=1
k_sen2[grep(voter_matrix$know_senate2,pattern='Independent')]=0
k_sen2[which(voter_matrix$know_senate2=='Democrat')]=-1
k_sen2[!grepl(k_sen2,pattern='[0,1]')]=-2

k_gov=as.numeric(k_gov)
k_sen1=as.numeric(k_sen1)
k_sen2=as.numeric(k_sen2)

sen2=sign(tapply(k_sen2[which(k_sen2> -2)],voter_matrix$state[which(k_sen2> -2)],mean,na.rm=T)) # sanders and angus king
sen2[which(names(sen2)=='Wyoming')]=1
sen2[which(names(sen2)=='Vermont')]=0
sen2[which(names(sen2)=='Maine')]=0
sen1=sign(tapply(k_sen1[which(k_sen1> -2)],voter_matrix$state[which(k_sen1> -2)],mean,na.rm=T))
gov=sign(tapply(k_gov[which(k_gov> -2)],voter_matrix$state[which(k_gov> -2)],mean,na.rm=T))

know_gov=know_senate2=know_senate1=array('Democrat',nrow(voter_matrix))
un_state=unique(voter_matrix$state)
for(j in 1:length(un_state)){
	if(un_state[j]!="District of Columbia"){

		ix=which(voter_matrix$state==un_state[j])

		if(gov[which(names(gov)==un_state[j])]==1){
			know_gov[ix]='Republican'
		}
		if(gov[which(names(gov)==un_state[j])]==0){
			know_gov[ix]='Other Party / Independent'
		}

		if(sen1[which(names(sen1)==un_state[j])]==1){
			know_senate1[ix]='Republican'
		}
		if(sen1[which(names(sen1)==un_state[j])]==0){
			know_senate1[ix]='Other Party / Independent'
		}

		if(sen2[which(names(sen2)==un_state[j])]==1){
			know_senate2[ix]='Republican'
		}
		if(sen2[which(names(sen2)==un_state[j])]==0){
			know_senate2[ix]='Other Party / Independent'
		}

	}
}


# knowledge battery
X=cbind(as.numeric(voter_matrix$know_senate1==know_senate1), 	# know party of sen1
as.numeric(voter_matrix$know_senate2==know_senate2),					# know party of sen2
as.numeric(voter_matrix$know_gov==know_gov),									# know party of gov
as.numeric(voter_matrix$us_house_majority=='Republicans'),		# know house majority
as.numeric(voter_matrix$us_senate_majority=='Republicans'),		# know senate majority
as.numeric(voter_matrix$self_place_attempt==1),								# attempt self place
as.numeric(voter_matrix$dem_place<voter_matrix$rep_place))		# place dems left of reps

colnames(X)=c('know_sen1','know_sen2','know_gov','know_house_maj','know_sen_maj','know_any_self_place','know_D_left_R')

#table iv: summary cces survey items to measure party knowledge
library(xtable)
print(xtable(as.matrix(colMeans(X,na.rm=T)[c(4,5,3,1,2,7,6)]),digits=3),file='appendix/figures/knowledge_summary.tex')
#Xl=cbind(
	#as.numeric(voter_matrix$know_senate1==know_senate1), 				# know party of sen1
	#as.numeric(voter_matrix$know_senate2==know_senate2),					# know party of sen2
	#as.numeric(voter_matrix$know_gov==know_gov),									# know party of gov
#as.numeric(voter_matrix$us_house_majority=='Republicans'),		# know house majority
#as.numeric(voter_matrix$us_senate_majority=='Republicans'),		# know senate majority
#as.numeric(voter_matrix$self_place_attempt==1),								# attempt self place
#as.numeric(voter_matrix$dem_place<voter_matrix$rep_place))		# place dems left of reps
#plot(table(rowSums(Xl)))
#mean(rowSums(Xl)/4)

####################################
# Scaling voters on policie and knowledge
# SCALING IRT KNOWLEDGE; account for harder/easier items
library(MCMCpack)

if(!file.exists('data/posterior2.Rdata')){
	posterior2 <- MCMCirt1d(X,
		theta.constraints=list(K="+", N="-"),
		B0.alpha=.2, B0.beta=.2,
		burnin=500, mcmc=5000, thin=20, verbose=500,
		store.item=TRUE,seed=1005
	)
	save(posterior2,file='data/posterior2.Rdata')
} else {
	load('data/posterior2.Rdata')
}

#geweke.diag(posterior2)
#plot(posterior2)
sumsK=summary(posterior2)
thetaK=-sumsK$statistics[,1][grep(names(sumsK$statistics[,1]),pattern='theta')]
#plot(rowSums(X),thetaK)
#cor.test(rowSums(X),thetaK)

# policy battery
Y=voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4",
"crme_policy1","crme_policy2","crme_policy3","crme_policy4",
"abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6",
"gays_policy1",
"immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8",
"roll_tpp_act","roll_trd_adj",
"guns_policy1","guns_policy2","guns_policy3","guns_policy4",
"dfns_policy2","roll_usa_fre","roll_iransct",
"roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]

#rownames(voter_matrix)[which(sign(voter_matrix$libcon)==-1)[1]]='L'
#rownames(voter_matrix)[which(sign(voter_matrix$libcon)==1)[1]]='R'
#rownames(voter_matrix)[which(rowSums(X)==0)[20]]='N'
#rownames(voter_matrix)[which(rowSums(X)==7)[20]]='K'

if(!file.exists('data/posterior1.Rdata')){
	posterior1 <- MCMCirt1d(Y,
		theta.constraints=list(R="+", L="-"),
		B0.alpha=.2, B0.beta=.2,
		burnin=500, mcmc=5000, thin=20, verbose=500,
		store.item=TRUE,seed=1005
	)
	save(posterior1,file='data/posterior1.Rdata')
} else {
	load('data/posterior1.Rdata')
}
#geweke.diag(posterior1)
#plot(posterior1)
sums1=summary(posterior1)
theta1=-sums1$statistics[,1][grep(names(sums1$statistics[,1]),pattern='theta')]
#plot(voter_matrix$libcon,theta1)
#cor.test(voter_matrix$libcon,theta1)

thetaZ=theta1

########################################################
########################################################
########################################################
# Descriptive Analyses and Figures Here
########################################################
########################################################
########################################################


#################################
#### plot density/hist of X correct + frequencies
# amoung partisans
ii=c(id,ir)
Xo=X[ii,]
summary(Xo)

### POOLING OVER ALL D AND R
pdf('appendix/figures/know_party_items_frequency.pdf')
hist(rowSums(Xo),xlab='Number of Correct Answers on 7 Party Knowledge Questions',main='',breaks=6)
dev.off()

i0=order(decreasing=T,colMeans(Xo))
X0=Xo[,i0]
pdf('appendix/figures/know_party_items_cumulative.pdf')
plot(main='',x=-10000,y=-1000,ylim=c(0,1),xlim=c(1,7),axes=F,xlab='',ylab='Percent Correct')
for(j in 1:ncol(X0)){
	points(x=j,y=mean(X0[,j]),col='blue') # margin
	points(x=j,y=mean(X0[,1:j]),col='darkblue',pch=19) # cumulative
	if(j > 1){
		lines(x=c(j,j-1),y=c(mean(X0[,1:j]),mean(X0[,1:(j-1)])),col='darkblue',lty=1)
		lines(x=c(j,j-1),y=c(mean(X0[,j]),mean(X0[,(j-1)])),col='blue',lty=2)
	}
}
axis(2)
axis(1,at=c(1,2,3,4,5,6,7),labels=FALSE)
legend(x=1,y=.2,legend=c('Marginal Correct','Cumulative Correct'),fill=c('darkblue','blue'),density=c(100,40),border='white')
lablist=c('Self Place       ','Know D < R       ', 'Know Gov. Pty.   ', 'Know Sen. II Pty.',
'Know Sen. I Pty. ', 'Know Hou. Maj.   ','Know Sen. Maj.  ')
text(.25+seq(1, 7, by=1), par("usr")[3] - 0.085, labels = lablist, srt = 310, pos = 1, xpd = TRUE,cex=.8)
dev.off()

#pdf('/appendix/figures/know_party_items_frequency.pdf')
#plot(table(rowSums(X)),xlab='Number of Correct Answers on 7 Party Knowledge Questions',main='')
#dev.off()


#################################
#### plot density/hist of
### POOLING OVER ALL D AND R
pdf('appendix/figures/libcon_issue_inconsistency.pdf')
# frequency of cross/out-party positions
hist(xmu,xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency.pdf')
hist(ymu,xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

### POOLING OVER ALL D AND R; HIGH and LOW KNOWLEDGE
pdf('appendix/figures/libcon_issue_inconsistency_high_know.pdf')
# frequency of cross/out-party positions
hist(xmu[which(rowSums(Xo)>=6)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency_high_know.pdf')
hist(ymu[which(rowSums(Xo)>=6)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

pdf('appendix/figures/libcon_issue_inconsistency_low_know.pdf')
# frequency of cross/out-party positions
hist(xmu[which(rowSums(Xo)<6)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency_low_know.pdf')
hist(ymu[which(rowSums(Xo)<6)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

for(k in 1:7){
	pdf(paste('appendix/figures/libcon_issue_inconsistency_know_',k,'.pdf',sep=''))
	# frequency of cross/out-party positions
	hist(xmu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
	dev.off()
	# frequency of in-party positions
	pdf(paste('appendix/figures/libcon_issue_consistency_know_',k,'.pdf',sep=''))
	hist(ymu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
	dev.off()
}

for(k in 6:7){
	#pdf(paste('appendix/figures/libcon_issue_inconsistency_know_',k,'.pdf',sep=''))
	## frequency of cross/out-party positions
	#hist(xmu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
	#dev.off()
	# frequency of in-party positions
	x=hist(breaks=8,ymu[which(rowSums(Xo)==k)]);dev.off()
	pdf(paste('appendix/figures/libcon_issue_consistency_know_',k,'.pdf',sep=''))
	cuts=cut(x$breaks, c(-Inf,7,9))
	plot(x,xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',col=c("white","darkgrey")[cuts],xlim=c(0,10),ylim=c(0,550))
	dev.off()
}


for(k in 0:4){
	#pdf(paste('appendix/figures/libcon_issue_inconsistency_know_',k,'.pdf',sep=''))
	## frequency of cross/out-party positions
	#hist(xmu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
	#dev.off()
	# frequency of in-party positions
	x=hist(breaks=8,ymu[which(rowSums(Xo)==k)]);dev.off()
	pdf(paste('appendix/figures/libcon_issue_consistency_know_',k,'.pdf',sep=''))
	cuts=cut(x$breaks, c(-Inf,7))
	plot(x,xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',col=c("lightgrey","white")[cuts],xlim=c(0,10),ylim=c(0,550))
	dev.off()
}


for(k in 5){
	#pdf(paste('appendix/figures/libcon_issue_inconsistency_know_',k,'.pdf',sep=''))
	## frequency of cross/out-party positions
	#hist(xmu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
	#dev.off()
	# frequency of in-party positions
	x=hist(breaks=8,ymu[which(rowSums(Xo)==k)]);dev.off()
	pdf(paste('appendix/figures/libcon_issue_consistency_know_',k,'.pdf',sep=''))
	cuts=cut(x$breaks, c(-Inf,7))
	plot(x,xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',col=c("white","white")[cuts],xlim=c(0,10),ylim=c(0,550))
	dev.off()
}

################################################################################
################################################################################
# inter item correlations and reliability benchmarks
################################################################################
################################################################################

# avg inter-item corr in 37 issue items
library(psych)
x.corr=0
n0=0
for(i in 1:(ncol(Y)-1)){
	for(j in (i+1):ncol(Y)){
		x.corr=x.corr+abs(cor(Y[,i],Y[,j]))
		n0=n0+1
	}
}
cronA=alpha(Y,check.keys=TRUE)
#  raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
      #0.88      0.87    0.89      0.16 6.9 0.0021 0.41 0.2     0.14
# cronbach alpha is .88 => items are mapping same dimension reliably  | evidence of internal consistency w/n sample
rho=(x.corr/n0)
#     0.1607636
# average iter-item correlation is .16; strikes me as rather high, but relevant to benchmark

################################################################################
# BENCHMARK alpha and rho by high and low knowledge
################################################################################
# fix on most knowledgeable
C7=A7=array(NA,8)
iq=which(rowSums(X)==7)
cronA_iq=alpha(Y[iq,],check.keys=TRUE)
cronA_niq=alpha(Y[-c(iq),],check.keys=TRUE)

#(cronA_iq$total)[1]
#(cronA_niq$total)[1]
for(k in 1:8){
	iq=which(rowSums(X)==(k-1))
	A7[k]=alpha(Y[iq,],check.keys=TRUE)$total[1]

	x.corr=0
	n0=0
	for(i in 1:(ncol(Y)-1)){
		for(j in (i+1):ncol(Y)){
			x.corr=x.corr+abs(cor(Y[iq,i],Y[iq,j]))
			n0=n0+1
		}
	}
	C7[k]=x.corr/n0
}

A7=unlist(A7)

A7; #0.7008052 0.6568972 0.7052517 0.7433666 0.7711684 0.8271232 0.8903477 0.9319513
C7; # 0.10942065 0.08543354 0.08814400 0.09415325 0.09988723 0.12404456 0.17816766 0.26201474
A7/A7[8]
#[1] 0.4176126 0.3260639 0.3364086 0.3593433 0.3812275 0.4734259 0.6799910 1.0000000
C7/C7[8]
#[1] 0.7519762 0.7048621 0.7567473 0.7976453 0.8274771 0.8875176 0.9553586 1.0000000
# - stratifying on least knowledgeable, gets reliability and correlation at 40% and 75% of that from
# stratifying on most knowledgeable
# half-full/empty, but tells an important point that (in)consistency here isn't just garbage data w/ error

# inter-item correlations w/n issue ares
cor(voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])
cor(voter_matrix[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])
cor(voter_matrix[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])
cor(voter_matrix[,c("gays_policy1","gays_policy1")])
cor(voter_matrix[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8")])
cor(voter_matrix[,c("roll_tpp_act","roll_trd_adj")])
cor(voter_matrix[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])
cor(voter_matrix[,c("dfns_policy2","roll_usa_fre","roll_iransct")])
cor(voter_matrix[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])
cor(voter_matrix[,c("roll_medicar","roll_rpl_aca","roll_minwage")])

c(mean(abs(cor(voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])[lower.tri(x=cor(voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])[lower.tri(x=cor(voter_matrix[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])[lower.tri(x=cor(voter_matrix[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")]), diag = FALSE)])),
#cor(voter_matrix[,c("gays_policy1","gays_policy1")])[lower.tri(x=cor(voter_matrix[,c()]), diag = FALSE)],
  mean(abs(cor(voter_matrix[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8")])[lower.tri(x=cor(voter_matrix[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_tpp_act","roll_trd_adj")])[lower.tri(x=cor(voter_matrix[,c("roll_tpp_act","roll_trd_adj")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])[lower.tri(x=cor(voter_matrix[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("dfns_policy2","roll_usa_fre","roll_iransct")])[lower.tri(x=cor(voter_matrix[,c("dfns_policy2","roll_usa_fre","roll_iransct")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(voter_matrix[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(voter_matrix[,c("roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)]))
)

mean(c(mean(abs(cor(voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])[lower.tri(x=cor(voter_matrix[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])[lower.tri(x=cor(voter_matrix[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])[lower.tri(x=cor(voter_matrix[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")]), diag = FALSE)])),
#cor(voter_matrix[,c("gays_policy1","gays_policy1")])[lower.tri(x=cor(voter_matrix[,c()]), diag = FALSE)],
  mean(abs(cor(voter_matrix[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8")])[lower.tri(x=cor(voter_matrix[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy4","immi_policy5","immi_policy6","immi_policy7","immi_policy8")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_tpp_act","roll_trd_adj")])[lower.tri(x=cor(voter_matrix[,c("roll_tpp_act","roll_trd_adj")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])[lower.tri(x=cor(voter_matrix[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("dfns_policy2","roll_usa_fre","roll_iransct")])[lower.tri(x=cor(voter_matrix[,c("dfns_policy2","roll_usa_fre","roll_iransct")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(voter_matrix[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)])),
  mean(abs(cor(voter_matrix[,c("roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(voter_matrix[,c("roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)]))
)
)
#  0.2176849 mean inter-summary correlation

################################################################################
################################################################################
# above is summary
# below is getting to analysis =>
#		step 1: classify empirical proportion of (un)sophisticates from our measures
#   step 2: produce stratifications on our scaled attitudes, natural knowledge scales
#   	=> X, scaled(Y), X by scaled(Y)
################################################################################
################################################################################

# K&K as useful guide

#########################################
#1. Use the self-placement scale
mean(abs(voter_matrix$self_place[c(id,ir)])==3)
#[1] 0.2414475
mean(abs(voter_matrix$self_place[c(id,ir)])>=2)
#[1] 0.5501285
mean(abs(voter_matrix$self_place[c(id,ir)])>=1)
#[1] 0.7741744

#########################################
#2. Knowledge
table(rowSums(Xo))/sum(table(rowSums(Xo)))
#0           1           2           3           4           5           6           7
#0.009689539 0.048447696 0.062487641 0.082855448 0.099861578 0.126952739 0.169863555 0.399841803
# 57% 6 or 7 correct; 40%  get all 7

#########################################
#3. Policy (in)consistency on 10 issue areas
table(xmu)/sum(table(xmu))
table(ymu)/sum(table(ymu))

c(mean(ymu==10),mean(ymu>=9),mean(ymu>=8),mean(ymu>=7),
mean(ymu>=6),mean(ymu>=5),mean(ymu>=4),mean(ymu>=3),
mean(ymu>=2),mean(ymu>=1))
#0.02669567 # all 10 consistent
#0.17124778 # 9 or 10
#0.36444532 # 8, 9, or 10
#0.52778327 # 7, 8, 9, or 10

c(mean(xmu==0),mean(xmu<=1),mean(xmu<=2),mean(xmu<=3),
mean(xmu<=4),mean(xmu<=5),mean(xmu<=6),mean(xmu<=7),
mean(xmu<=8),mean(xmu<=9))
#0.1465296 # 0 inconsistent positions
#0.4421594 # 1 or fewer inconsistent

#########################################
#4. Stability; aggregated estimates using pre/post

#########################################
#5. Interaction is take top scorers or knowledge and consistency
#c(id,ir)[which(xmu<=0)] #15%
#c(id,ir)[which(rowSums(Xo)>=7)] #40%
#c(id,ir)[which(xmu<=0&rowSums(Xo)>=7)] #9%

#c(id,ir)[which(xmu<=1)] #44%
#c(id,ir)[which(rowSums(Xo)>=6)] #57%
#c(id,ir)[which(xmu<=1 & rowSums(Xo)>=6)] #33%

# reasonable enough, though can also use scaling below, to get to 20 to 30% ....
rnk=(rank(thetaK[c(id,ir)])+rank(abs(thetaZ)[c(id,ir)]))/2
#quantile(rnk,prob=c(.7,.8))
#c(id,ir)[which(rnk>=3411.5)] #30% thresh
#c(id,ir)[which(rnk>=3788.4)] #20% thresh
rnk2=(xmu/10+rowSums(Xo)/7)/2

#included1=array(0,length(rnk))
#included2=array(0,length(rnk))

H_included1 = rnk>=quantile(rnk,prob=c(.7,.8))[1]
H_included2 = xmu<=1 & rowSums(Xo)>=6

#mean(xmu<=1 & rowSums(Xo)>=6)
#[1] 0.3296421

# included sophisticateds
#c(id,ir)[H_included1]
#c(id,ir)[H_included2]

# excluded non-sophisticateds
#c(id,ir)[!H_included1]
#c(id,ir)[!H_included2]

#quantile(rnk,prob=c(.2,.3))
L_included1 = rnk<=quantile(rnk,prob=c(.2,.3))[2]
L_included2 = xmu>=2 & rowSums(Xo)<=5

mean(xmu>=2 & rowSums(Xo)<=5)
#[1] 0.3177773

# alternatively take the average of these
#H_included2 = rnk2>quantile(rnk2,prob=c(.3,.7))[2]
#L_included2 = rnk2<=quantile(rnk2,prob=c(.3,.7))[1]
# excluded least-sophisticateds
#c(id,ir)[L_included1]
#c(id,ir)[L_included2]

M_included1 = !H_included1 & !L_included1
M_included2 = !H_included2 & !L_included2
mean( M_included2)
#[1] 0.3525806

##voter_matrix$respondent[c(id,ir)]
##voter_matrix$pid3[c(id,ir)]

###############
# descriptives of sophistication indices x validated turnout
load('data/sophistication_indices.Rdata')
library(xtable)

L_ix2=indices$resp_id[indices$"L_ix2"]
M_ix2=indices$resp_id[indices$"M_ix2"]
H_ix2=indices$resp_id[indices$"H_ix2"]

candidate_matrix$sophisticated_level=NA
for(i in 1:length(L_ix2)){
	iq=which(L_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='L'
}
for(i in 1:length(M_ix2)){
	iq=which(M_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='M'
}
for(i in 1:length(H_ix2)){
	iq=which(H_ix2[i]==candidate_matrix$respondent)
	candidate_matrix$sophisticated_level[iq]='H'
}


table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),]
table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),]

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),2])
#H         M         L
#0.4748879 0.3721973 0.1529148

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_primary)[c(1,3,2),1])
#H         M         L
#0.2746807 0.3492111 0.3761082

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),2])
#H         M         L
#0.4033544 0.3656860 0.2309596

table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1]/sum(table(candidate_matrix$sophisticated_level,candidate_matrix$valid_general)[c(1,3,2),1])
#H         M         L
#0.2705793 0.3475610 0.3818598

#END
